#install.packages("MCMCpack", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("coda", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("mcmcplots", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("xtable", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("R2HTML", repos="http://cran.r-project.org", lib="~/R_libs")
#install.packages("rjags", repos="http://cran.r-project.org", lib="~/R_libs")

#install.packages("MCMCpack", repos="http://cran.r-project.org")
#install.packages("coda", repos="http://cran.r-project.org")
#install.packages("mcmcplots", repos="http://cran.r-project.org")
#install.packages("xtable", repos="http://cran.r-project.org")
#install.packages("R2HTML", repos="http://cran.r-project.org")
#install.packages("rjags", repos="http://cran.r-project.org")



rm(list = ls())
set.seed(12345)

######### FLAGS  ###########################################################################
set.seed(12345)                                                                            #
testing = FALSE          # INDICATE WHETHER RUN ON PC OR SGE                               #
savemcmc = FALSE          # INDICATE WHETHER TO SAVE THETA MCMC INTO CSV                    #
ind.prediction = FALSE   # INDICATE WHETHER INDIVIDUAL PREDICTION IS TO BE SAVED           #
                                                                                           #
setwd(paste0("~/Bin_123/"))		# Comment out setwd if running on desktop              #
filename = "IRT123bin"                                                                    #
data = read.csv(paste0(filename,".csv"), header=TRUE)                                      #
                                                                                           #
### END OF FLAGS ###########################################################################


## Special command to remove question per question
# "q512" "q5164" "q5171" "q5172" "q6071"
# data = data[, -which(colnames(data)=="q512")]                                              



if (testing) {
   library(coda)
   library(MCMCpack)
   library(mcmcplots)
   library(xtable)
   library(R2HTML)
   library(rjags)
} else {
          library(coda, lib="~/R_libs")
          library(MCMCpack, lib="~/R_libs")
          library(mcmcplots, lib="~/R_libs")
          library(xtable, lib="~/R_libs")
          library(R2HTML, lib="~/R_libs")
		      library(rjags, lib="~/R_libs")
}                                    





M = which(colnames(data)=="wave")-1     			#number of questions


empty = which(apply(!is.na(data[,1:M]),1,sum)==0)   #respondent with all NAs
data = data[-empty,]


N = nrow(data)  #number of respondents



if (testing) {
   N = 800
   index = sample(x=nrow(data), size=N, replace=FALSE)
   data = data[index,]
   rownames(data) <- NULL
}



########################################################################
#Calculate Anchors: Most and Least Extreme Respondents
########################################################################

#raw scores of respondents, calculated from summing answers to items
#can do that because items haven been recoded so that higher values equal more extreme answer
score = rowSums(data[,1:M], na.rm=TRUE)
lowest = min(score)
highest = max(score)

#most and least extreme respondents
leastextreme = paste0("V",which(score==lowest)[sample(length(which(score==lowest)),1)])
mostextreme = paste0("V",which(score==highest)[sample(length(which(score==highest)),1)])

constraints=list(lib="-", con="+")
names(constraints)=c(leastextreme,mostextreme)



########################################################################
#Graded Response Model from JSS Code Snippet
########################################################################


## MCMC settings ###################################
nadapt <- 100
thin = 50
niter = 100*thin
nburnin =  1000

if (testing) {
  nadapt <- 100 
  thin = 20
  niter = 10*thin
  nburnin = 50
}


## specify data to pass #######################
Y = data[,1:M]		# Create Y or response matrix
				         	# M is column index for the last question in the dataset

p <- ncol(Y)		# number of questions
n <- nrow(Y)		# number of respondents
m.beta = 1			# mean beta
s.beta = 1			# sd beta
m.alpha = 0			# mean alpha
s.alpha = 1			# sd alpha


## creating matrix for probabilities
prob = matrix(NA,nrow=n,ncol=p)


# IRT data 
irt.dat <- list(Y=Y, n=n, p=p, m.beta=m.beta, s.beta=s.beta, m.alpha=m.alpha, s.alpha=s.alpha, prob=prob)

# initial ideal points
inits <- list(theta=(score>mean(score)) - .5,   #give .5 if more extreme than average and -.5 otherwise
			  .RNG.name="base::Wichmann-Hill",
			  .RNG.seed=12345
		     )


# create the JAGS model object
jagsModel <- jags.model(
	file = "modelBinaryJAGS.txt",                # specification of the model
  data= irt.dat,                         # data for the model
	inits=inits,                           # initial values for ideal points
	n.chains=1,                            # number of MCMC chains
	n.adapt=nadapt                         # number of iterations of MCMC adaptation, not saved
	)



# run the MCMC for a while without saving the samples	
set.seed(12345)
update(jagsModel,n.iter=nburnin) # number of iterations of MCMC burn in, not saved	

	
# save sample from the posterior distribution
irt.out <- jags.samples(
	jagsModel,                                            # model to sample from
	variable.names=c("theta","beta","prob"),              # names of all variables to save
	n.iter = niter,                                       # number of iterations to sample
	thin = thin
	)	


## extract the samples
## need to be transposed because original output is N x Niter
theta <- as.mcmc(t(irt.out$theta[,,1]))
colnames(theta) = paste0("V",row.names(Y))
beta <- as.mcmc(t(irt.out$beta[,,1]))
colnames(beta) = colnames(Y)


## create prediction matrix for posterior predictive
## format r1q1, r1q2, ..., r2q1, r2q2, ..., rnqp
pred = matrix(rep(NA,(niter/thin)*n*p), nrow=(niter/thin))

for (iter in 1:(niter/thin)) {
    start = end = 0
    
    for (i in 1:n) {
        start = end + 1
        end = start + p - 1
        mat = irt.out$prob[i,,iter,1]
        pred[iter, start:end] = as.numeric(mat > .5)
    }
}

if (ind.prediction) {
	write.csv(pred, paste(filename,"Individual Prediction.csv"),row.names=FALSE)
}


## save theta MCMC
if (savemcmc) {
  write.csv(x=theta,file=paste(filename, "Theta MCMC.csv"),row.names=FALSE)    
}


########################################################################
#CHECK ANCHOR
########################################################################

pdf(paste(filename, "Theta Most Extreme.pdf"))
  index = which(score==highest)[sample(length(which(score==highest)),min(sum(score==highest),15))]
  caterplot(theta[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), denstrip=TRUE, col="gray", style="plain", pch=NA)
  caterplot(theta[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", add=TRUE)
dev.off()

pdf(paste(filename, "Theta Least Extreme.pdf"))
  index = which(score==lowest)[sample(length(which(score==lowest)),min(sum(score==lowest),15))]
  caterplot(theta[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), denstrip=TRUE, col="gray", style="plain", pch=NA)
  caterplot(theta[,index],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), col="red", add=TRUE)
dev.off()



########################################################################
#BETA CODA
########################################################################

sum = summary(beta)$statistics
betaorder = row.names(sum[order(-sum[,1]),])

pdf(paste(filename, "Beta Plot.pdf"))
caterplot(beta[,betaorder],quantiles=list(outer=c(0.025,0.975), inner=c(0.025,0.975)), denstrip=TRUE, col="gray", style="plain", pch=NA)
caterplot(beta[,betaorder],quantiles=list(outer=c(0.025,0.975), inner=c(0.025,0.975)), col="red", add=TRUE)
dev.off()

pdf(paste(filename, "Beta AutoCorr.pdf"))
autocorr.plot(beta,lag.max=75)
dev.off()

pdf(paste(filename, "Beta Geweke.pdf"))
geweke.plot(beta)
dev.off()

HTML(geweke.diag(beta),file=paste(filename, "Beta Geweke.html"),append=FALSE)


########################################################################
#SUMMARY STATISTICS
########################################################################

## Beta
sum.beta = as.matrix(summary(beta)$statistics)
write.csv(sum.beta,paste(filename, "Beta Statistics.csv"))

sum.beta = as.matrix(summary(beta)$quantiles)
write.csv(sum.beta,paste(filename, "Beta Quantiles.csv"))


## Thetas
sum.theta = data.frame(as.matrix(summary(theta)$statistics), data)
write.csv(sum.theta,paste(filename, "Theta Statistics.csv"), row.names = FALSE)
sum.theta = data.frame(as.matrix(summary(theta)$quantiles), data)
write.csv(sum.theta,paste(filename, "Theta Quantiles.csv"), row.names = FALSE)


########################################################################
# ACCURACY STATISTICS
# 1) ACCURACY FOR EACH RESPONDENT ACROSS ITEMS
# 2) ACCURACY FOR EACH RESPONDENT ITEM BY ITEM
########################################################################

## calculate percent correct for each respondent (all items)
## format: pct.correct[iteration, respondent]
pct.correct = matrix(NA, nrow=nrow(pred), ncol=n)
for (i in 1:n) {
  start = (i-1)*p + 1         
  end = (i*p)
  pct.correct[,i] = apply(pred[,start:end], 1, function(x) sum(x==Y[i,], na.rm=TRUE))/sum(!is.na(Y[i,]), na.rm=TRUE)
}


## calculate percent correct for each respondent (item by item)
## format: correct.item[item,iteration,respondent]
correct.item = array(data=NA, dim=c(p,nrow(pred),n))
for (i in 1:p) {
  for (j in 1:n) {
    index = (j-1)*p + i             # this is index for respondent-j on question-i
    correct.item[i,,j] = as.numeric(pred[,index] == Y[j,i])
  }
}


## mode for each item, needed for APRE
mode = rep(NA,p)
minority = rep(NA,p)
for (i in 1:p) {
  ux <- unique(Y[,i])
  mode[i] = ux[which.max(tabulate(match(Y[,i], ux)))]
  minority[i] = sum(Y[,i] != mode[i], na.rm=TRUE)
}


## calculate total correct and incorrect for each item, needed for APRE
total.correct = matrix(NA, nrow=nrow(pred), ncol=p)
total.incorrect = matrix(NA, nrow=nrow(pred), ncol=p)
  

for (i in 1:nrow(pred)) {
  for (j in 1:p) {
    total.correct[i,j] = sum(correct.item[j,i,]==1, na.rm=TRUE)
    total.incorrect[i,j] = sum(correct.item[j,i,]==0, na.rm=TRUE)
  }
}


## calculate APRE and accuracy 
APRE = rep(NA,nrow(pred))                 # APRE for each iteration
for (i in 1:nrow(pred)) {
    APRE[i] = sum(minority - total.incorrect[i,])/sum(minority)
}

total.correct.all = apply(total.correct,1,sum)               # total correct across items
total.incorrect.all = apply(total.incorrect,1,sum)           # total incorrect across items
accuracy = total.correct.all/(total.correct.all+total.incorrect.all)


out = matrix(NA, nrow=nrow(pred), ncol=2)
out[,1] = APRE
out[,2] = accuracy
colnames(out) = c("APRE","Accuracy")

write.csv(out,paste(filename, "Aggregate Accuracy.csv"),row.names=FALSE)


########################################################################
# COUNTRY STATISTICS
# 1) POSTERIOR PREDICTIVE DISTRIBUTION ACROSS ITEMS, COUNTRY LEVEL
# 2) Posterior DISTRIBUTION
########################################################################


weight = data$weight
countrylist = unique(data$countrywave)
countrylist = sort(countrylist)
numcountry = length(countrylist)

countrymat.pred = countrymat.theta = matrix(NA,nrow=nrow(pred),ncol=numcountry)
colnames(countrymat.pred) = colnames(countrymat.theta) <- countrylist

countrymat.item = array(NA,dim=c(p,nrow(pred),numcountry))


x.max = max(summary(theta[,1:N])$statistics[,1])
x.min = min(summary(theta[,1:N])$statistics[,1])


## create country matrix
pdf(paste(filename, "Country Distributions.pdf"))
for (i in 1:numcountry) {
  index = which(data$countrywave==countrylist[i])
  
  votermat.pred = pct.correct[,index]
  votermat.theta = theta[,index]
  
  if (length(index)<2) {
    countrymat.pred[,i]=votermat.pred
    countrymat.theta[,i]=votermat.theta
  } else { 
    countrymat.pred[,i] = (votermat.pred%*%weight[index])/sum(weight[index])
    countrymat.theta[,i] = (votermat.theta%*%weight[index])/sum(weight[index])
  }
  
  ## item by item posterior predictive
  for (j in 1:p) {
    index = which((data$countrywave==countrylist[i]) & (!is.na(Y[,j])))
    if (length(index)<1) next;        # meaning: the question isn't fielded in that country
    
    votermat.item = correct.item[j,,index]
    
    if (length(index)<2) {
      countrymat.item[j,,i]=votermat.item
    } else { 
      countrymat.item[j,,i] = (votermat.item%*%weight[index])/sum(weight[index])
    }
  }
  
  
  # this part is to build country by country kernel density
  X = summary(votermat.theta)$statistics[,1]
  d=density(X)
  max.d = max(d$y)
  max.h = max(hist(X,plot=FALSE)$density)
  plot(d$x,rep(NA,length(d$x)),xlim=c(x.min-.2,x.max+.2),ylim=c(0,max(max.h,max.d)+.01),xlab="Islamism",ylab="Density",main=countrylist[i])
  lines(d, lwd=2)
  hist(X, add=TRUE, prob=TRUE,lwd=1)
  abline(v=mean(countrymat.theta[,i]), lty=2, lwd=2)
}
dev.off()


##add for overall wave
wavemat.theta = matrix(NA,nrow=nrow(pred),ncol=length(unique(data$wave)))
colnames(wavemat.theta) <- paste("Wave", sort(unique(data$wave)))

for (i in (1:ncol(wavemat.theta))) {
  index = which(data$wave==sort(unique(data$wave))[i])
  votermat.theta = theta[,index]
  wavemat.theta[,i] = rowMeans(votermat.theta)
}


countrymat.theta = data.frame(countrymat.theta, wavemat.theta)



## country theta posterior
countrymcmc.theta = as.mcmc(countrymat.theta)
write.csv(x=countrymcmc.theta,file=paste(filename, "Country Theta MCMC.csv"),row.names=FALSE)

sum = summary(countrymcmc.theta)$statistics
ctyorder = colnames(countrymcmc.theta)
pdf(paste(filename, "Country Theta Plot.pdf"))
caterplot(countrymcmc.theta[,ctyorder],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), 
          col="red", style="plain", reorder=FALSE)
dev.off()

png(paste(filename, "Country Theta Plot.png"), width=3000, height=7000, res = 600)
par(mar = c(2, 0.5, 0.5, 0.5), xaxs = "i")
caterplot(countrymcmc.theta[,ctyorder],quantiles=list(outer=c(0.025,0.975),inner=c(0.025,0.975)), 
          col="red", style="plain", reorder=FALSE, labels.loc = 'above')
dev.off()


pdf(paste(filename, "Country Theta Plot Summary.pdf"))
plot(countrymcmc.theta)
dev.off()

pdf(paste(filename, "Country Theta AutoCorr.pdf"))
autocorr.plot(countrymcmc.theta,lag.max=75)
dev.off()

pdf(paste(filename, "Country Theta Geweke Plot.pdf"))
geweke.plot(countrymcmc.theta)
dev.off()

HTML("<b>Country Theta Geweke</b>",file=paste(filename, "Country Theta Geweke.html"),append=FALSE)
HTML(geweke.diag(countrymcmc.theta),file=paste(filename, "Country Theta Geweke.html"),append=TRUE)

sum = as.matrix(summary(countrymcmc.theta)$statistics)
write.csv(sum, paste(filename, "Country Theta Statistics.csv"), row.names = TRUE)

sum = as.matrix(summary(countrymcmc.theta)$quantiles)
write.csv(sum, paste(filename, "Country Theta Quantiles.csv"), row.names = TRUE)

